In this notebook we inspect how we can use the BNPR fits from phylodyn to model annual growth.

0.0.0.1 Setup

source("Scripts/vaf_dynamics_functions.R")
## 
## Attaching package: 'greta'
## The following objects are masked from 'package:stats':
## 
##     binomial, cov2cor, poisson
## The following objects are masked from 'package:base':
## 
##     %*%, apply, backsolve, beta, chol2inv, colMeans, colSums, diag,
##     eigen, forwardsolve, gamma, identity, rowMeans, rowSums, sweep,
##     tapply
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✔ tibble  3.0.4     ✔ dplyr   1.0.2
## ✔ tidyr   1.1.2     ✔ stringr 1.4.0
## ✔ readr   1.4.0     ✔ forcats 0.5.0
## ✔ purrr   0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ✖ dplyr::slice()  masks greta::slice()
## This is bayesplot version 1.7.2
## - Online documentation and vignettes at mc-stan.org/bayesplot
## - bayesplot theme set to bayesplot::theme_default()
##    * Does _not_ affect other ggplot2 plots
##    * See ?bayesplot_theme_set for details on theme setting
## 
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggpubr':
## 
##     get_legend
## 
## Attaching package: 'extraDistr'
## The following objects are masked from 'package:gtools':
## 
##     ddirichlet, rdirichlet
## The following object is masked from 'package:purrr':
## 
##     rdunif
## 
## ---------------------
## Welcome to dendextend version 1.14.0
## Type citation('dendextend') for how to cite the package.
## 
## Type browseVignettes(package = 'dendextend') for the package vignette.
## The github page is: https://github.com/talgalili/dendextend/
## 
## Suggestions and bug-reports can be submitted at: https://github.com/talgalili/dendextend/issues
## Or contact: <tal.galili@gmail.com>
## 
##  To suppress this message use:  suppressPackageStartupMessages(library(dendextend))
## ---------------------
## 
## Attaching package: 'dendextend'
## The following object is masked from 'package:ggpubr':
## 
##     rotate
## The following object is masked from 'package:stats':
## 
##     cutree
## 
## Attaching package: 'ape'
## The following objects are masked from 'package:dendextend':
## 
##     ladderize, rotate
## The following object is masked from 'package:ggpubr':
## 
##     rotate
## Registered S3 method overwritten by 'treeio':
##   method     from
##   root.phylo ape
## ggtree v2.0.4  For help: https://yulab-smu.github.io/treedata-book/
## 
## If you use ggtree in published research, please cite the most appropriate paper(s):
## 
## - Guangchuang Yu, Tommy Tsan-Yuk Lam, Huachen Zhu, Yi Guan. Two methods for mapping and visualizing associated data on phylogeny using ggtree. Molecular Biology and Evolution 2018, 35(12):3041-3043. doi: 10.1093/molbev/msy194
## - Guangchuang Yu, David Smith, Huachen Zhu, Yi Guan, Tommy Tsan-Yuk Lam. ggtree: an R package for visualization and annotation of phylogenetic trees with their covariates and other associated data. Methods in Ecology and Evolution 2017, 8(1):28-36, doi:10.1111/2041-210X.12628
## 
## Attaching package: 'ggtree'
## The following object is masked from 'package:ape':
## 
##     rotate
## The following object is masked from 'package:dendextend':
## 
##     rotate
## The following object is masked from 'package:tidyr':
## 
##     expand
## The following object is masked from 'package:ggpubr':
## 
##     rotate
## 
## Attaching package: 'reghelper'
## The following object is masked from 'package:greta':
## 
##     beta
## The following object is masked from 'package:base':
## 
##     beta
source("Scripts/make_ultrametric.R")

library(parallel)
library(Matrix)
## 
## Attaching package: 'Matrix'
## The following object is masked from 'package:ggtree':
## 
##     expand
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
library(castor)
## Loading required package: Rcpp
library(phangorn)
library(phylodyn)
library(minpack.lm)
INLA:::inla.dynload.workaround()

change_point <- function(x, b0, m1, m2, delta) { 
  b0 + (x*m1) + (sapply(x-delta, function (t) max(0, t)) * m2)
}

plot_tree <- function(obj) { 
  tree_ultra <- obj$tree_ultra
  tree_ultra$edge.length[is.infinite(tree_ultra$edge.length)] <- 0
  tree_ultra$tip.label <- obj$tree$S
  driver_list <- Filter(function(x) length(x) > 5,obj$driver_list)
  if (length(driver_list) > 1) {
    colours <- RColorBrewer::brewer.pal(length(driver_list),"Set3")
  } else {
    colours <- c("black")
  }
  colour_code <- rep("black",length(tree_ultra$tip.label))
  R <- 1:length(driver_list)
  if (length(driver_list) == 0) {
    R <- numeric(0)
  } 
  for (i in R) {
    D <- driver_list[[i]]
    colour_code[tree_ultra$tip.label %in% unique(D)] <- colours[i]
  }
  plot(tree_ultra,tip.color = colour_code)
}

clade_from_mrca <- function(tree,mrca) {
  edge_idx <- which(tree$edge[,1] == mrca)
  continue <- T
  output <- c(which(tree$edge[,2] == mrca))
  while (continue == T) {
    if (length(edge_idx) > 0) {
      output <- c(output,edge_idx)
      new_edge_idx <- c()
      for (idx in edge_idx) {
        nodes <- tree$edge[idx,2]
        new_idxs <- which(tree$edge[,1] == nodes)
        new_edge_idx <- c(new_edge_idx,new_idxs)
      }
      edge_idx <- new_edge_idx
    } else {
      continue <- F
    }
  }
  return(output)
}

build_tree <- function(sub_data,subsample_size=100,
                       detection_threshold=0) {
  sub_af <- sub_data %>% 
    group_by(CloneID) %>% 
    summarise(N = max(NClones),.groups = "drop") %>%
    arrange(CloneID)
  MaxClone <- max(sub_data$CloneID)
  MaxMut <- max(sub_data$MutID)
  sub_af_sparse <- rep(0,MaxClone)
  sub_af_sparse[sub_af$CloneID] <- sub_af$N 
  
  Presence <- sparseMatrix(i = sub_data$CloneID,
                           j = sub_data$MutID,
                           dims = c(MaxClone,MaxMut))
  Presence[cbind(sub_data$CloneID,sub_data$MutID)] <- 1

  if (sum(as.logical(sub_af_sparse)) < subsample_size) {
    sub_af_sparse <- rep(1,length(sub_af_sparse))
  }
  
  S <- sample(MaxClone,subsample_size,replace = F,prob = sub_af_sparse)
  af <- sub_af_sparse[S]
  Presence <- Presence[S,]
  Presence <- as.matrix(rbind(Presence,wt=0))
  Presence <- Presence[,colSums(Presence) > 0]
  
  dst <- as.matrix(dist(Presence > 0,method = "manhattan"))
  tree <- root(njs(dst),
               outgroup = "wt",
               resolve.root = T,
               edgelabel = T) 
  tree <- drop.tip(tree,"wt")
  return(list(tree = tree,S = S,af = af))
}

read_clonex <- function(path,d = 1000) {
  x <- read_tsv(path,
                col_names = c("Gen","NClones","CloneID","MutID"),
                col_types = c(col_integer(),col_integer(),col_integer(),col_integer()),
                progress = F) %>%
    mutate(Driver = MutID <= d)
  return(x)
}

trajectory_from_td <- function(x=0:1000, td=0, s=0.01, N=2e5, g=1){
  s <- s/g
  t0 <- td - 1/(s)
  t <- x-t0
  t <- t*g
  y <- pmax(0,t)
  if (s==0)
    return(y)
  td <- 1/(s)
  te <- log(N*s -1)/s + td
  y[t>td] <- N/(1+exp(-(s*(t[t>td]-te))))
  return(y)
}

trajectory_from_t0 <- function(x=0:1000, t0=0, s=0.01, N=2e5, g=1){
  s <- s/g
  t <- x-t0
  t <- t*g
  y <- pmax(0,t)
  if (s==0)
    return(y)
  td <- 1/(s)
  te <- log(N*s -1)/s + td
  y[t>td] <- N/(1+exp(-(s*(t[t>td]-te))))
  return(y)
}

1 BNPR

In this section we calculate/load all BNPR trajectories for the trees inferred from Wright-Fisher (WF) simulations. These simulations are done for 6 different fitness levels - 0.005, 0.010, 0.015, 0.020, 0.025 and 0.030 - over 800 generations and with a fixed population size (200,000).

1.1 Loading data and inferring trees

The first step is to build the trees, which we then display. One can immediately see that expansions become increasingly prevalent for higher fitness effects, with quite a few cases of a single clone sweeping all of the population.

N <- 2e5
N_DRIVERS <- 20
all_file_paths <- list.files(
  "hsc_output_bnpr_complete",pattern = "last_generation",
  full.names = T,recursive = T)
all_driver_file_paths <- list.files(
  "hsc_output_bnpr_complete",pattern = "driver",
  full.names = T,recursive = T)

file_name <- "data_output/simulated_trees_trajectories.RDS"
if (file.exists(file_name) == T) {
  tree_traj <- readRDS(file_name)
  trees <- tree_traj[[1]]
  all_driver_trajectories <- tree_traj[[2]]
} else {
  all_driver_trajectories <- all_driver_file_paths %>%
    lapply(function(x) {
      read.csv(x,header = F) %>%
        arrange(V1,V2) %>%
        select(Gen = V1,MutID = V2,Count = V3)})
  
  trees <- mclapply(
    all_file_paths,
    mc.cores = 6,
    mc.cleanup = T,
    function(path) {
      s <- str_match(path,"hsc_[0-9.]+") %>%
        gsub(pattern = "hsc_",replacement = "") %>%
        as.numeric()
      system(sprintf('echo "%s"', path)) # prints during mclapply by using bash
      x <- read_clonex(path,d = N_DRIVERS) %>%
        subset(Gen == 800) %>%
        subset(MutID != 0)
      x$R <- as.numeric(str_match(path,"[0-9.]+$"))
      x$s <- s
      x$drift_threshold <- 1 / (N * (s))
      
      driver_list <- x %>% 
        subset(Driver == T) %>% 
        select(CloneID,MutID) 
      driver_list_out <- list()
      for (driver_id in unique(driver_list$MutID)) {
        tmp <- driver_list %>%
          subset(MutID == driver_id) %>%
          select(CloneID) %>%
          unlist
        driver_list_out[[as.character(driver_id)]] <- tmp
      }
      tree <- x %>%
        build_tree()
      ultrametric_tree <- make.ultrametric.tree(tree$tree)
      gc()
      list(
        tree = tree,
        tree_ultra = ultrametric_tree,
        driver_list = driver_list_out,
        path = path
      ) %>%
        return
    }
  )
  
  names(trees) <- gsub('/last_generation','',all_file_paths)
  names(all_driver_trajectories) <- gsub('/driver_trajectory','',all_driver_file_paths)
  saveRDS(object = list(trees,all_driver_trajectories),file = file_name)
}
par(mfrow = c(4,5),mar = c(2,0.5,1,0.5))
for (tree_name in names(trees)) {
  tree <- trees[[tree_name]]
  fitness <- str_match(tree_name,"[0-9.]+")
  plot(tree$tree_ultra,main = fitness)
}

1.2 Calculating BNPR trajectories for the whole trees

Here we calculate the actual EPS trajectories using BNPR and display them.

file_name <- "data_output/simulated_bnpr_trees.RDS"
if (file.exists(file_name) == T) {
  all_estimates_trees <- readRDS(file_name)
} else {
  all_estimates_trees <- mclapply(
    trees,
    mc.cores = 2, # do not go over 2 cores here, may lead to OOM errors
    mc.cleanup = T,
    function(x) {
      system(sprintf('echo "%s"', x$path)) # prints during mclapply by using bash
      BNPR(x$tree_ultra) %>%
        return
      }
    )
  saveRDS(all_estimates_trees,file_name)
}

1.3 Calculating mcmc.popsize and skyline trajectories for the whole trees

We accompany our BNPR estimates with those obtained through mcmc.popsize and skyline estimations to prove that what we are observing are not artefacts specific to BNPR. Interestingly, mcmc.popsize appears to suffer from its own bias - in some trajectories a very large initial EPS followed by a dip is observable. This holds for both possible priors - constant population size and a skyline-process population size, with the latter ameliorating this effect slightly.

all_estimates_trees_skyline <- mclapply(
    trees,
    mc.cores = 2,
    mc.cleanup = T,
    function(x) skyline(x$tree_ultra,epsilon = 0.01)
    )

all_estimates_trees_skyline_wide <- mclapply(
    trees,
    mc.cores = 2,
    mc.cleanup = T,
    function(x) skyline(x$tree_ultra,epsilon = 0.05)
    )

all_estimates_trees_mcmc <- mclapply(
    trees,
    mc.cores = 2, 
    mc.cleanup = T,
    function(x) mcmc.popsize(x$tree_ultra,nstep=10000,thinning = 100,
                             progress.bar = F,
                             burn.in = 1000,lambda = 0.1,
                             method.prior.heights = "constant")
    )

all_estimates_trees_mcmc_skyline <- mclapply(
    trees,
    mc.cores = 2, 
    mc.cleanup = T,
    function(x) mcmc.popsize(x$tree_ultra,nstep=10000,thinning = 100,
                             progress.bar = F,
                             burn.in = 1000,lambda = 0.1,
                             method.prior.heights = "skyline")
    )
par(mfrow = c(4,5),mar = c(1.5,2,2,1))
for (name in names(all_estimates_trees)) {
  plot_BNPR(all_estimates_trees[[name]])
  lines(all_estimates_trees_skyline[[name]],col = "green")
  lines(all_estimates_trees_skyline_wide[[name]],col = "#026440")
  lines(extract.popsize(all_estimates_trees_mcmc[[name]]),col = "red")
  lines(extract.popsize(all_estimates_trees_mcmc_skyline[[name]]),col = "purple")
}

1.4 Calculating BNPR trajectories for each clade

file_name <- "data_output/simulated_bnpr_clades.RDS"
if (file.exists(file_name) == T) {
  all_estimates <- readRDS(file_name)
} else {
  all_estimates <- list()
  i <- 1
  for (obj_name in names(trees)) {
    obj <- trees[[obj_name]]
    print(obj_name)
    s <- str_match(obj_name,"[0-9.]+") %>%
      as.numeric
    Tree <- list(tree = obj$tree_ultra)
    Tree$tree$tip.label <- obj$tree$S
    Tree$tree$edge.length[is.infinite(Tree$tree$edge.length)] <- 0
    tree <- Tree$tree
    driver_id_list <- Filter(function(x) length(x) > 5,obj$driver_list)
    all_drivers <- do.call(c,driver_id_list)
    clades <- cut_tree(tree = tree,depth = 0.1) %>%
      Filter(f = function(x) length(x) >= 5)
    clades_ <- list()
    for (clade in clades) {
      clade_tips <- tree$tip.label[clade]
      for (driver in names(driver_id_list)) {
        if (any(clade_tips %in% driver_id_list[[driver]])) {
          clades_[[length(clades_)+1]] <- list(
            clade = clade,
            driver = driver
          )
        }
      }
    }
    clades <- clades_
    
    if (!(obj_name %in% names(all_estimates))) {
      bnpr_estimates <- mclapply(
        clades,
        mc.cores = 2,
        mc.cleanup = T,
        function(clad) {
          #tip_in_clade <- which(Tree$tree$tip.label %in% clad)
          tip_in_clade <- clad$clade
          if (length(tip_in_clade) > 4) {
            sub_tree <- keep.tip(Tree$tree,tip_in_clade) %>%
              multi2di()
            if (!is.null(sub_tree)) {
              bnpr_estimates <- NULL
              bnpr_estimate <- BNPR(sub_tree)
              if (!is.null(bnpr_estimate)) {
                list(
                  tree = tree,
                  bnpr = bnpr_estimate,
                  clade = clad$clade,
                  tree = sub_tree,
                  driver = clad$driver) %>%
                  return
              }
            }
          }
        }
      ) %>%
        Filter(f = function(x) !is.null(x))
      all_estimates[[obj_name]] <- bnpr_estimates
    }
    gc()
  }
  saveRDS(all_estimates,file_name)
}

1.5 Calculating mcmc.popsize trajectories for each clade

all_estimates_mcmc <- list()
i <- 1
par(mfrow = c(4,5),mar=c(1,2,2,1))
for (obj_name in names(all_estimates)) {
  print(obj_name)
  if (!(obj_name %in% names(all_estimates_mcmc))) {
    obj <- all_estimates[[obj_name]]
    estimates <- list()
    for (child_obj in obj) {
      Tr <- child_obj$tree %>%
        keep.tip(child_obj$clade) %>%
        multi2di()
      pr <- ifelse(length(child_obj$tree$tip.labels) > 10,"skyline","constant")
      pop_size_eps <- mcmc.popsize(Tr,10000,100,1000,
                                   lambda = 0.1,
                                   progress.bar = F,
                                   method.prior.heights = pr)
      estimates[[length(estimates)+1]] <- list(pop_size_eps = pop_size_eps)
    }
    estimates <- estimates %>%
     Filter(f = function(x) !is.null(x))
    all_estimates_mcmc[[obj_name]] <- estimates
  }
  gc()
}
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_1"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_10"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_11"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_12"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_13"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_14"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_15"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_16"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_17"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_18"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_19"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_2"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_20"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_3"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_4"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_5"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_6"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_7"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_8"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_9"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_1"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_10"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_11"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_12"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_13"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_14"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_15"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_16"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_17"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_18"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_19"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_2"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_20"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_3"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_4"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_5"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_6"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_7"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_8"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_9"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_1"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_10"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_11"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_12"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_13"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_14"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_15"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_16"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_17"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_18"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_19"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_2"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_20"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_3"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_4"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_5"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_6"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_7"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_8"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_9"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_1"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_10"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_11"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_12"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_13"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_14"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_15"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_16"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_17"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_18"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_19"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_2"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_20"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_3"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_4"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_5"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_6"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_7"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_8"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_9"
## [1] "hsc_output_bnpr_complete/hsc_0.025_10_1"
## [1] "hsc_output_bnpr_complete/hsc_0.025_10_10"
## [1] "hsc_output_bnpr_complete/hsc_0.025_10_11"
## [1] "hsc_output_bnpr_complete/hsc_0.025_10_12"
## [1] "hsc_output_bnpr_complete/hsc_0.025_10_13"
## [1] "hsc_output_bnpr_complete/hsc_0.025_10_14"
## [1] "hsc_output_bnpr_complete/hsc_0.025_10_15"
## [1] "hsc_output_bnpr_complete/hsc_0.025_10_16"
## [1] "hsc_output_bnpr_complete/hsc_0.025_10_17"
## [1] "hsc_output_bnpr_complete/hsc_0.025_10_18"
## [1] "hsc_output_bnpr_complete/hsc_0.025_10_19"
## [1] "hsc_output_bnpr_complete/hsc_0.025_10_2"
## [1] "hsc_output_bnpr_complete/hsc_0.025_10_20"
## [1] "hsc_output_bnpr_complete/hsc_0.025_10_3"
## [1] "hsc_output_bnpr_complete/hsc_0.025_10_4"
## [1] "hsc_output_bnpr_complete/hsc_0.025_10_5"
## [1] "hsc_output_bnpr_complete/hsc_0.025_10_6"
## [1] "hsc_output_bnpr_complete/hsc_0.025_10_7"
## [1] "hsc_output_bnpr_complete/hsc_0.025_10_8"
## [1] "hsc_output_bnpr_complete/hsc_0.025_10_9"
## [1] "hsc_output_bnpr_complete/hsc_0.03_6_1"
## [1] "hsc_output_bnpr_complete/hsc_0.03_6_10"
## [1] "hsc_output_bnpr_complete/hsc_0.03_6_11"
## [1] "hsc_output_bnpr_complete/hsc_0.03_6_12"
## [1] "hsc_output_bnpr_complete/hsc_0.03_6_13"
## [1] "hsc_output_bnpr_complete/hsc_0.03_6_14"
## [1] "hsc_output_bnpr_complete/hsc_0.03_6_15"
## [1] "hsc_output_bnpr_complete/hsc_0.03_6_16"
## [1] "hsc_output_bnpr_complete/hsc_0.03_6_17"
## [1] "hsc_output_bnpr_complete/hsc_0.03_6_18"
## [1] "hsc_output_bnpr_complete/hsc_0.03_6_19"
## [1] "hsc_output_bnpr_complete/hsc_0.03_6_2"
## [1] "hsc_output_bnpr_complete/hsc_0.03_6_20"
## [1] "hsc_output_bnpr_complete/hsc_0.03_6_3"
## [1] "hsc_output_bnpr_complete/hsc_0.03_6_4"
## [1] "hsc_output_bnpr_complete/hsc_0.03_6_5"
## [1] "hsc_output_bnpr_complete/hsc_0.03_6_6"
## [1] "hsc_output_bnpr_complete/hsc_0.03_6_7"
## [1] "hsc_output_bnpr_complete/hsc_0.03_6_8"
## [1] "hsc_output_bnpr_complete/hsc_0.03_6_9"

1.6 Calculating skyline trajectories for each clade

all_estimates_skyline <- list()
names_done <- names(all_estimates_skyline)
i <- 1
par(mfrow = c(4,5),mar=c(1,2,2,1))
for (obj_name in names(all_estimates)) {
  print(obj_name)
  if (!(obj_name %in% names_done)) {
    obj <- all_estimates[[obj_name]]
    estimates <- list()
    for (child_obj in obj) {
      Tr <- child_obj$tree %>%
        keep.tip(child_obj$clade) %>%
        multi2di()
      sk <- skyline(Tr,epsilon = 0.02)
      estimates[[length(estimates)+1]] <- list(skyline = sk)
    }
    estimates <- estimates %>% 
      Filter(f = function(x) !is.null(x))
    all_estimates_skyline[[obj_name]] <- estimates
  }
  gc()
}
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_1"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_10"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_11"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_12"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_13"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_14"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_15"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_16"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_17"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_18"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_19"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_2"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_20"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_3"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_4"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_5"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_6"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_7"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_8"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_9"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_1"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_10"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_11"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_12"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_13"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_14"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_15"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_16"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_17"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_18"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_19"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_2"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_20"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_3"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_4"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_5"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_6"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_7"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_8"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_9"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_1"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_10"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_11"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_12"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_13"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_14"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_15"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_16"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_17"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_18"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_19"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_2"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_20"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_3"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_4"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_5"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_6"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_7"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_8"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_9"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_1"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_10"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_11"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_12"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_13"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_14"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_15"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_16"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_17"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_18"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_19"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_2"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_20"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_3"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_4"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_5"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_6"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_7"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_8"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_9"
## [1] "hsc_output_bnpr_complete/hsc_0.025_10_1"
## [1] "hsc_output_bnpr_complete/hsc_0.025_10_10"
## [1] "hsc_output_bnpr_complete/hsc_0.025_10_11"
## [1] "hsc_output_bnpr_complete/hsc_0.025_10_12"
## [1] "hsc_output_bnpr_complete/hsc_0.025_10_13"
## [1] "hsc_output_bnpr_complete/hsc_0.025_10_14"
## [1] "hsc_output_bnpr_complete/hsc_0.025_10_15"
## [1] "hsc_output_bnpr_complete/hsc_0.025_10_16"
## [1] "hsc_output_bnpr_complete/hsc_0.025_10_17"
## [1] "hsc_output_bnpr_complete/hsc_0.025_10_18"
## [1] "hsc_output_bnpr_complete/hsc_0.025_10_19"
## [1] "hsc_output_bnpr_complete/hsc_0.025_10_2"
## [1] "hsc_output_bnpr_complete/hsc_0.025_10_20"
## [1] "hsc_output_bnpr_complete/hsc_0.025_10_3"
## [1] "hsc_output_bnpr_complete/hsc_0.025_10_4"
## [1] "hsc_output_bnpr_complete/hsc_0.025_10_5"
## [1] "hsc_output_bnpr_complete/hsc_0.025_10_6"
## [1] "hsc_output_bnpr_complete/hsc_0.025_10_7"
## [1] "hsc_output_bnpr_complete/hsc_0.025_10_8"
## [1] "hsc_output_bnpr_complete/hsc_0.025_10_9"
## [1] "hsc_output_bnpr_complete/hsc_0.03_6_1"
## [1] "hsc_output_bnpr_complete/hsc_0.03_6_10"
## [1] "hsc_output_bnpr_complete/hsc_0.03_6_11"
## [1] "hsc_output_bnpr_complete/hsc_0.03_6_12"
## [1] "hsc_output_bnpr_complete/hsc_0.03_6_13"
## [1] "hsc_output_bnpr_complete/hsc_0.03_6_14"
## [1] "hsc_output_bnpr_complete/hsc_0.03_6_15"
## [1] "hsc_output_bnpr_complete/hsc_0.03_6_16"
## [1] "hsc_output_bnpr_complete/hsc_0.03_6_17"
## [1] "hsc_output_bnpr_complete/hsc_0.03_6_18"
## [1] "hsc_output_bnpr_complete/hsc_0.03_6_19"
## [1] "hsc_output_bnpr_complete/hsc_0.03_6_2"
## [1] "hsc_output_bnpr_complete/hsc_0.03_6_20"
## [1] "hsc_output_bnpr_complete/hsc_0.03_6_3"
## [1] "hsc_output_bnpr_complete/hsc_0.03_6_4"
## [1] "hsc_output_bnpr_complete/hsc_0.03_6_5"
## [1] "hsc_output_bnpr_complete/hsc_0.03_6_6"
## [1] "hsc_output_bnpr_complete/hsc_0.03_6_7"
## [1] "hsc_output_bnpr_complete/hsc_0.03_6_8"
## [1] "hsc_output_bnpr_complete/hsc_0.03_6_9"

1.7 Comparing BNPR, mcmc.popsize and skyline trajectories

1.7.1 For clades with 10 or fewer tips

# fewer than 10 tips per clade
par(mfrow = c(4,5),mar = c(1,1,1,1))
for (name in names(all_estimates)) {
  obj <- all_estimates[[name]]
  obj_mcmc <- all_estimates_mcmc[[name]]
  obj_skyl <- all_estimates_skyline[[name]]
  
  if (length(obj) > 0) {
    for (i in 1:length(obj)) {
      if (length(obj[[i]]$clade) <= 10) {
        bnpr_traj <- obj[[i]]$bnpr
        mcmc_traj <- obj_mcmc[[i]]$pop_size_eps
        skyl_traj <- obj_skyl[[i]]$skyline
        
        plot_BNPR(bnpr_traj)
        try(lines(extract.popsize(mcmc_traj),col = "red"))
        lines(skyl_traj,col = "green")
      }
    }
  }
}

## Warning in plot.window(...): nonfinite axis limits [GScale(-inf,52.9555,2, .);
## log=1]

## Error in prep$pos[index.right] : subscript out of bounds

1.7.2 For clades with 10-50 tips

# 10-50 tips
par(mfrow = c(4,5),mar = c(1,1,1,1))
for (name in names(all_estimates)) {
  obj <- all_estimates[[name]]
  obj_mcmc <- all_estimates_mcmc[[name]]
  obj_skyl <- all_estimates_skyline[[name]]
  
  if (length(obj) > 0) {
    for (i in 1:length(obj)) {
      if (length(obj[[i]]$clade) < 50 & length(obj[[i]]$clade) > 10) {
        bnpr_traj <- obj[[i]]$bnpr
        mcmc_traj <- obj_mcmc[[i]]$pop_size_eps
        skyl_traj <- obj_skyl[[i]]$skyline
        
        plot_BNPR(bnpr_traj)
        try(lines(extract.popsize(mcmc_traj),col = "red"))
        lines(skyl_traj,col = "green")
      }
    }
  }
}

## Error in prep$pos[index.right] : subscript out of bounds

## Error in prep$pos[index.right] : subscript out of bounds

1.7.3 For clades with 50+ tips

# 50-100 clades
par(mfrow = c(4,5),mar = c(1,1,1,1))
for (name in names(all_estimates)) {
  obj <- all_estimates[[name]]
  obj_mcmc <- all_estimates_mcmc[[name]]
  obj_skyl <- all_estimates_skyline[[name]]
  
  if (length(obj) > 0) {
    for (i in 1:length(obj)) {
      if (length(obj[[i]]$clade) >= 50) {
        bnpr_traj <- obj[[i]]$bnpr
        mcmc_traj <- obj_mcmc[[i]]$pop_size_eps
        skyl_traj <- obj_skyl[[i]]$skyline
        
        plot_BNPR(bnpr_traj)
        try(lines(extract.popsize(mcmc_traj),col = "red"))
        # extracting population size fails for a few cases
        lines(skyl_traj,col = "green")
      }
    }
  }
}
## Error in prep$pos[index.right] : subscript out of bounds

1.7.4 Comparing fits derived from BNPR, mcmc.popsize and skyline

There is quite good agreement between log-linear trajectories fitted to skyline estimation and BNPR. This does not hold as well when comparing mcmc.popsize and BNPR - probably due to the initial dip in population size, the EPS estimates derived from mcmc.popsize lead to a slight underestimation of log-linear growth trajectories. As for early/late growth rate comparisons we observe little agreement between BNPR and mcmc.popsize or skyline estimation.

linear_estimates <- list()
early_late_estimates <- list()
idx <- 1
for (x in names(all_estimates)) {
  obj <- all_estimates[[x]]
  obj_mcmc <- all_estimates_mcmc[[x]]
  obj_skyl <- all_estimates_skyline[[x]]
  linear_estimates[[x]] <- list()
  early_late_estimates[[x]] <- list()
  if (length(obj) > 0){
    for (i in 1:length(obj)) {
      try(
        {
          bnpr_data <- data.frame(
            X = (1 - obj[[i]]$bnpr$summary$time)*800,
            Y = log(obj[[i]]$bnpr$summary$quant0.5))
          ps <- extract.popsize(obj_mcmc[[i]]$pop_size_eps)
          X_ <- seq(min(obj_skyl[[i]]$skyline$time),max(obj_skyl[[i]]$skyline$time),length.out = 100)
          sk_Y <- obj_skyl[[i]]$skyline$population.size[as.numeric(cut(X_,c(0,obj_skyl[[i]]$skyline$time)))]
          skyl_data <- data.frame(
            X = (1 - X_)*800,
            Y = log(sk_Y)
            ) %>% 
            na.omit
          mcmc_data <- data.frame(
            X = (1 - ps[,1])*800,
            Y = log(ps[,3])) %>%
            na.omit
          linear_estimates[[x]][[i]] <- list(
            id = idx,
            bnpr = lm(Y ~ X,data = bnpr_data),
            skyl = lm(Y ~ X,data = skyl_data),
            mcmc = lm(Y ~ X,data = mcmc_data)
          )
          early_late_estimates[[x]][[i]] <- list(
            id = idx,
            bnpr = list(
              late = lm(Y ~ X,data = bnpr_data[1:20,]),
              early = lm(Y ~ X,data = bnpr_data[nrow(bnpr_data) - c(19:0),])),
            skyl = list(
              late = lm(Y ~ X,data = skyl_data[1:20,]),
              early = lm(Y ~ X,data = skyl_data[nrow(skyl_data) - c(19:0),])),
            mcmc = list(
              late = lm(Y ~ X,data = mcmc_data[1:40,]),
              early = lm(Y ~ X,data = mcmc_data[nrow(mcmc_data) - c(39:0),]))
            )
          }
      )
    }
  }
  idx <- idx + 1
}
## Error in prep$pos[index.right] : subscript out of bounds
## Error in prep$pos[index.right] : subscript out of bounds
## Error in prep$pos[index.right] : subscript out of bounds
## Error in prep$pos[index.right] : subscript out of bounds
linear_estimates_bnpr_mcmc_skyl <- linear_estimates %>% 
    lapply(function(x) do.call(rbind,
                               lapply(x, function(y) c(y$id,
                                                       y$bnpr$coefficient[2],
                                                       y$mcmc$coefficient[2],
                                                       y$skyl$coefficient[2])))) %>%
  do.call(what = rbind) %>%
  as.data.frame()
colnames(linear_estimates_bnpr_mcmc_skyl) <- c("id","BNPR","mcmc.popsize","Skyline")
linear_estimates_bnpr_mcmc_skyl$id <- names(all_estimates)[linear_estimates_bnpr_mcmc_skyl$id]
linear_estimates_bnpr_mcmc_skyl$fitness <- linear_estimates_bnpr_mcmc_skyl$id %>%
  str_match("[0-9.]+") %>% 
  as.numeric

early_estimates_bnpr_mcmc_skyl <- early_late_estimates %>% 
    lapply(function(x) do.call(rbind,
                               lapply(x, function(y) c(y$id,
                                                       y$bnpr$early$coefficient[2],
                                                       y$mcmc$early$coefficient[2],
                                                       y$skyl$early$coefficient[2])))) %>%
  do.call(what = rbind) %>%
  as.data.frame()
colnames(early_estimates_bnpr_mcmc_skyl) <- c("id","BNPR","mcmc.popsize","Skyline")
early_estimates_bnpr_mcmc_skyl$id <- names(all_estimates)[early_estimates_bnpr_mcmc_skyl$id]
early_estimates_bnpr_mcmc_skyl$fitness <- early_estimates_bnpr_mcmc_skyl$id %>%
  str_match("[0-9.]+") %>% 
  as.numeric

late_estimates_bnpr_mcmc_skyl <- early_late_estimates %>% 
  Filter(f = function(f) !is.null(x)) %>% 
  lapply(function(x) do.call(rbind,
                             lapply(x, function(y) c(y$id,
                                                     y$bnpr$late$coefficient[2],
                                                     ifelse(!is.null(y$bnpr$late),summary(y$bnpr$late)$coefficients[2,2],NA),
                                                     y$mcmc$late$coefficient[2],
                                                     y$skyl$late$coefficient[2])))) %>%
  do.call(what = rbind) %>%
  as.data.frame()
colnames(late_estimates_bnpr_mcmc_skyl) <- c("id","BNPR","BNPR_se","mcmc.popsize","Skyline")
late_estimates_bnpr_mcmc_skyl$id <- names(all_estimates)[late_estimates_bnpr_mcmc_skyl$id]
late_estimates_bnpr_mcmc_skyl$fitness <- late_estimates_bnpr_mcmc_skyl$id %>%
  str_match("[0-9.]+") %>% 
  as.numeric

A <- linear_estimates_bnpr_mcmc_skyl %>%
  ggplot() +
  geom_point(aes(x = Skyline,y = BNPR),size = 0.5) + 
  geom_abline(slope = 1,linetype = 2,size = 0.25) +
  theme_gerstung(base_size = 6) + 
  coord_cartesian(xlim = c(-0.1,0.1),
                  ylim = c(-0.1,0.1))

B <- early_estimates_bnpr_mcmc_skyl %>%
  ggplot() +
  geom_point(aes(x = Skyline,y = BNPR),size = 0.5) + 
  geom_abline(slope = 1,linetype = 2,size = 0.25) +
  theme_gerstung(base_size = 6) + 
  coord_cartesian(xlim = c(-0.1,0.1),
                  ylim = c(-0.1,0.1))

C <- late_estimates_bnpr_mcmc_skyl %>%
  ggplot() +
  geom_point(aes(x = Skyline,y = BNPR),size = 0.5) + 
  geom_abline(slope = 1,linetype = 2,size = 0.25) +
  theme_gerstung(base_size = 6) + 
  coord_cartesian(xlim = c(-0.1,0.1),
                  ylim = c(-0.1,0.1))

D <- linear_estimates_bnpr_mcmc_skyl %>%
  ggplot() +
  geom_point(aes(x = mcmc.popsize,y = BNPR),size = 0.5) + 
  geom_abline(slope = 1,linetype = 2,size = 0.25) +
  theme_gerstung(base_size = 6) + 
  coord_cartesian(xlim = c(-0.1,0.1),
                  ylim = c(-0.1,0.1))

E <- early_estimates_bnpr_mcmc_skyl %>%
  ggplot() +
  geom_point(aes(x = mcmc.popsize,y = BNPR),size = 0.5) + 
  geom_abline(slope = 1,linetype = 2,size = 0.25) +
  theme_gerstung(base_size = 6) + 
  coord_cartesian(xlim = c(-0.1,0.1),
                  ylim = c(-0.1,0.1))

FF <- late_estimates_bnpr_mcmc_skyl %>%
  ggplot() +
  geom_point(aes(x = mcmc.popsize,y = BNPR),size = 0.5) + 
  geom_abline(slope = 1,linetype = 2,size = 0.25) +
  theme_gerstung(base_size = 6) + 
  coord_cartesian(xlim = c(-0.1,0.1),
                  ylim = c(-0.1,0.1))

plot_grid(A,B,C,D,E,FF,nrow = 2)
## Warning: Removed 4 rows containing missing values (geom_point).

## Warning: Removed 4 rows containing missing values (geom_point).

data.frame(
  BNPR = c(cor(linear_estimates_bnpr_mcmc_skyl$fitness,linear_estimates_bnpr_mcmc_skyl$BNPR),
           cor(early_estimates_bnpr_mcmc_skyl$fitness,early_estimates_bnpr_mcmc_skyl$BNPR))^2,
  Skyline = c(cor(linear_estimates_bnpr_mcmc_skyl$fitness,linear_estimates_bnpr_mcmc_skyl$Skyline),
              cor(early_estimates_bnpr_mcmc_skyl$fitness,early_estimates_bnpr_mcmc_skyl$Skyline))^2,
  mcmc.popsize = c(cor(linear_estimates_bnpr_mcmc_skyl$fitness,linear_estimates_bnpr_mcmc_skyl$mcmc.popsize),
                   cor(early_estimates_bnpr_mcmc_skyl$fitness,early_estimates_bnpr_mcmc_skyl$mcmc.popsize))^2,
  fit = c("Log-linear","Early log-linear"))

1.8 Timing clones from trees

for (x in names(all_estimates)) {
  estimate <- all_estimates[[x]]
  if (length(estimate) > 0) {
    for (y in 1:length(estimate)) {
      m <- MRCA(estimate[[y]]$tree,estimate[[y]]$clade)
      estimate[[y]]$mutation_timing <- time_mutation(
        estimate[[y]]$tree,m)
      all_estimates[[x]][[y]]$mutation_timing <- estimate[[y]]$mutation_timing
    }
  }
}

2 Modelling BNPR trajectories

Here we model the EPS trajectories using different types of models, namely:

  • A log-linear model
  • A scaled and shifted sigmoidal curve
  • A log-linear model with one changepoint (captures the earlier and later phases of growth)

We also fit the same models to the original WF trajectories one gets for each individual driver. By doing so, we can more accurately verify whether we are able to recapitulate the clonal growth as it is happening with the BNPR trajectories.

2.1 Fitting different models to BNPR trajectory

all_fits <- list()
par(mfrow = c(4,5),mar = c(3,1,1,1))
for (obj_name in names(all_estimates)) {
  bnpr_estimates <- all_estimates[[obj_name]] %>%
    Filter(f = function(x) class(x$bnpr) != "try-error")
  new_bnpr_estimates <- list()
  for (x in bnpr_estimates) {
    bnpr_estimate <- x$bnpr
    Y <- log(bnpr_estimate$summary$mean)
    Y_ <- Y - min(Y,na.rm = T) + 1e-8
    Y025 <- log(bnpr_estimate$summary$quant0.025)
    Y975 <- log(bnpr_estimate$summary$quant0.975)
    X <- (1-bnpr_estimate$summary$time) * 800
    tmp_df <- data.frame(
      X = X,Y = Y,Y_ = Y_,
      Y025 = Y025,Y975 = Y975,
      W = (Y975 - Y025)^2/16
    ) %>% 
      na.omit()
    tmp_df <- tmp_df[!is.infinite(tmp_df$Y),]
    tmp_df$Y_exp <- exp(tmp_df$Y)
    
    tmp_df$W[is.infinite(tmp_df$W)] <- max(tmp_df$W[!is.infinite(tmp_df$W)])
    linear_estimate <- lm(Y ~ X,w = 1 / tmp_df$W,data = tmp_df)
    m <- linear_estimate$coefficients[2]
    linear_estimate_no_weights <- lm(Y ~ X,data = tmp_df)
    
    if (nrow(tmp_df) >= 10 & max(tmp_df$Y_exp) < 1e100) {
      non_linear_estimate <- nlsLM(
        Y_exp ~ SSlogis(X, Asym, b2, b3),
        start = list(Asym = max(tmp_df$Y_exp),b2 = mean(tmp_df$X),b3 = 10),
        control = nls.control(maxiter = 1000,warnOnly = T),
        data = tmp_df,
        algorithm = "port",
        weights = 1/tmp_df$W)
      x_earliest <- seq(min(tmp_df$X),min(tmp_df$X) + 50,length.out = 20)
      y_earliest <- approx(tmp_df$X,tmp_df$Y,xout = x_earliest)$y
      
      x_latest <- seq(max(tmp_df$X) - 50,max(tmp_df$X),length.out = 20)
      y_latest <- approx(tmp_df$X,tmp_df$Y,xout = x_latest)$y
      
      latest <- lm(y_latest ~ x_latest)
      earliest <- lm(y_earliest ~ x_earliest)

      if (class(non_linear_estimate) == "try-error") {
        non_linear_estimate <- try(nlsLM(
          Y_exp ~ SSlogis(X, Asym, b2, b3),
          start = list(Asym = max(tmp_df$Y_exp),b2 = max(tmp_df$X),b3 = 10),
          control = nls.control(maxiter = 1000,warnOnly = T),
          data = tmp_df,
          algorithm = "port",
          weights = 1/tmp_df$W))
      }
    } else {
      non_linear_estimate <- NULL
      earliest <- NULL
      latest <- NULL
    }
    
    opt_fn <- function(par) {
      se <- (tmp_df$Y - change_point(tmp_df$X,par[1],par[2],par[3],par[4]))^2
      return(mean(se / tmp_df$W))
    }
    change_point_regression <- optim(
      par = c(linear_estimate$coefficients[1],
              linear_estimate$coefficients[2],
              0,
              mean(X)),
      method = "L-BFGS-B",
      lower = c(NA,0,NA,0),
      upper = c(NA,NA,NA,800),
      fn = opt_fn)
    x$linear_estimate <- linear_estimate
    x$linear_estimate_no_weights <- linear_estimate_no_weights
    x$change_point_regression <- change_point_regression
    x$non_linear_estimate <- non_linear_estimate
    x$data <- tmp_df
    x$earliest_growth <- earliest
    x$latest_growth <- latest
    new_bnpr_estimates[[length(new_bnpr_estimates) + 1]] <- x
  }
  
  bnpr_estimates <- new_bnpr_estimates %>%
    Filter(f = function(x) !is.null(x))
  
  all_estimates[[obj_name]] <- bnpr_estimates
  for (bnpr in bnpr_estimates) {
    cpr <- bnpr$change_point_regression
    all_fits[[length(all_fits)+1]] <- data.frame(
      x = bnpr$data$X,y = exp(bnpr$data$Y),
      obj_name,n = length(all_fits)+1,
      clade_size = length(bnpr$clade))

    if ((cpr$par[2] + cpr$par[3]) < 0) {
        # plot_BNPR(bnpr$bnpr,main = sprintf("%s",length(bnpr$clade)))
        # plot(bnpr$data$X,exp(bnpr$data$Y),col = "black")
        # if (!is.null(bnpr$non_linear_estimate)) {
        #   lines(bnpr$data$X,predict(
        #     bnpr$non_linear_estimate,newdata = data.frame(X = bnpr$data$X)),
        #     col = "red")
        # }
        # lines(1-bnpr$data$X/800,
        #       exp(change_point(bnpr$data$X,cpr$par[1],cpr$par[2],cpr$par[3],cpr$par[4])),
        #       col="red")
      }
    }
}

2.2 Fitting different models to driver trajectories from the original WF simulations

all_driver_fits <- list()
par(mfrow = c(4,5),mar = c(1.5,2.5,2.5,1))
for (x in names(all_driver_trajectories)) {
  driver_trajectories <- all_driver_trajectories[[x]]
  driver_trajectories <- driver_trajectories %>%
    arrange(MutID,Gen) %>%
    group_by(MutID) %>%
    mutate(Break = c(T,diff(Gen) != 20)) %>%
    mutate(Component = cumsum(Break)) %>%
    mutate(MutID_C = sprintf("%s_%s",MutID,Component))
  drivers <- unique(driver_trajectories$MutID_C)
  all_driver_fits[[x]] <- list()
  for (driver in drivers) {
    tmp_df <- driver_trajectories %>%
      subset(MutID_C == driver)
    driver <- tmp_df$MutID[1]
    has_last_gen <- any(tmp_df$Gen == 800)
    large_at_last_gen <- ifelse(
      has_last_gen,
      tmp_df$Count[tmp_df$Gen == 800] > 200,
      F)
    
    if (nrow(tmp_df) >= 5 & has_last_gen & large_at_last_gen) {
      opt_fn <- function(par) {
        se <- (log(tmp_df$Count) - change_point(tmp_df$Gen,par[1],par[2],par[3],par[4]))^2
        return(mean(se))
      }
      
      x_earliest <- seq(min(tmp_df$Gen),min(tmp_df$Gen) + 50,length.out = 20)
      y_earliest <- approx(tmp_df$Gen,tmp_df$Count,xout = x_earliest)$y
      
      x_latest <- seq(max(tmp_df$Gen) - 50,max(tmp_df$Gen),length.out = 20)
      y_latest <- approx(tmp_df$Gen,tmp_df$Count,xout = x_latest)$y
      
      latest <- lm(y_latest ~ x_latest)
      earliest <- lm(y_earliest ~ x_earliest)

      linear_sol <- lm(log(Count) ~ Gen,data = tmp_df)
      change_point_regression <- optim(
        par = c(linear_sol$coefficients[1],linear_sol$coefficients[2],0,mean(tmp_df$Gen)),
        method = "L-BFGS-B",
        lower = c(NA,0,NA,0),
        upper = c(NA,NA,NA,800),
        fn = opt_fn)
      non_linear_estimate <- nlsLM(
        Count ~ SSlogis(Gen, Asym, b2, b3),
        start = list(Asym = max(tmp_df$Count),b2 = mean(tmp_df$Gen),b3 = 10),
        control = nls.control(maxiter = 1000,warnOnly = T),
        data = tmp_df,
        upper = c(Asym = 2e5,b2 = 3000,b3 = 1e5),
        algorithm = "port")
      last_vaf <- tmp_df$Count[tmp_df$Gen == 800]/2e5
      all_driver_fits[[x]][[as.character(driver)]] <- list(
        data = tmp_df,
        non_linear_estimate = non_linear_estimate,
        change_point_regression = change_point_regression,
        earliest_driver = earliest,
        latest_driver = latest,
        last_vaf = last_vaf,
        gen_at_onset = min(tmp_df$Gen))
    }
  }
}

2.3 Extracting coefficients from BNPR fits

all_bnpr_fits <- names(all_estimates) %>%
  lapply(
    function(x) {
      if (length(all_estimates[[x]]) > 0) {
        tree_eps <- unlist(all_estimates_trees[[x]]$summary[1,c(4,2,6)])
        lapply(1:length(all_estimates[[x]]),function(y) {
          y <- as.numeric(y)
          if (!is.null(all_estimates[[x]][[y]]$non_linear_estimate)) {
            pars <- all_estimates[[x]][[y]]$non_linear_estimate$m$getAllPars()
            tao <- all_estimates[[x]][[y]]$mutation_timing * 800
            cp <- all_estimates[[x]][[y]]$change_point_regression$par
            pop_size_at_tao_cp <- exp((800 - tao)*cp[2])
            earliest <- all_estimates[[x]][[y]]$earliest_growth$coefficients[2]
            latest <- all_estimates[[x]][[y]]$latest_growth$coefficients[2]
            O <- data.frame(
              l = all_estimates[[x]][[y]]$linear_estimate$coefficients[2],
              nl = 1 / pars[3],
              nl_asym = pars[1],
              nl_xmid = pars[2],
              l_nw = all_estimates[[x]][[y]]$linear_estimate_no_weights$coefficients[2],
              cp_1 = cp[2],
              cp_2 = cp[3],
              earliest = earliest,
              latest = latest,
              changepoint_bnpr = cp[4],
              time_at_onset_low = tao[1],
              time_at_onset_high = tao[2],
              w_sum = mean(all_estimates[[x]][[y]]$data$W,na.rm=T),
              pop_at_detection_low = pop_size_at_tao_cp[2],
              pop_at_detection_high = pop_size_at_tao_cp[1],
              tree_eps_low = tree_eps[1],
              tree_eps_mean = tree_eps[2],
              tree_eps_high = tree_eps[3],
              point_of_saturation = which(
                c(
                  predict(
                    all_estimates[[x]][[y]]$non_linear_estimate,
                    newdata = data.frame(X = seq(1,800)))) > (0.9 * pars[1])) %>% min,
              name = x,
              clade_size = length(all_estimates[[x]][[y]]$clade),
              driver = all_estimates[[x]][[y]]$driver,
              clade = y)
            return(O)
          }
        }) %>%
          do.call(what = rbind)
      } else {
        return(NULL)
      }
    }
    ) %>%
  do.call(what = rbind) %>%
  mutate(fitness = as.numeric(gsub('_',"",str_match(name,"_[0-9.]+_")))) %>%
  group_by(name) %>%
  mutate(N = length(unique(clade))) %>%
  mutate(cp_late = cp_1 + cp_2)
## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

2.4 Extracting coefficients from driver fits from the original WF simulations

all_driver_fits_df <- names(all_driver_fits) %>% 
  lapply(function(x) {
    if (length(all_driver_fits[[x]]) > 0) {
      lapply(names(all_driver_fits[[x]]),function(y) {
        pars <- all_driver_fits[[x]][[y]]$non_linear_estimate$m$getAllPars()
        earliest <- all_driver_fits[[x]][[y]]$earliest_driver$coefficients[2]
        latest <- all_driver_fits[[x]][[y]]$latest_driver$coefficients[2]
        
        early_cp <- all_driver_fits[[x]][[y]]$change_point_regression$par[2]
        
        m <- all_driver_fits[[x]][[y]]$data %>%
          subset(Gen == min(Gen))
        v <- m$Count / 2e5
        time <- m$Gen
        m <- -log((1 - v)/v) - early_cp * time
        expected_last_vaf <- 1 / (1 + exp(-(m + early_cp * 800)))
        return(
          data.frame(
            Asym = pars[1],
            xmid = pars[2],
            growth_rate = 1/pars[3],
            early_growth = all_driver_fits[[x]][[y]]$change_point_regression$par[2],
            late_growth = all_driver_fits[[x]][[y]]$change_point_regression$par[3],
            earliest_driver = earliest,
            latest_driver = latest,
            changepoint_wf = all_driver_fits[[x]][[y]]$change_point_regression$par[4],
            point_of_saturation_wf = which(
              c(
                predict(
                  all_driver_fits[[x]][[y]]$non_linear_estimate,
                  newdata = data.frame(Gen = seq(1,800)))) > (0.9 * pars[1])) %>% min,
            name = x,
            driver = y,
            last_vaf = all_driver_fits[[x]][[y]]$last_vaf,
            expected_last_vaf,
            gen_at_onset = all_driver_fits[[x]][[y]]$gen_at_onset
          )
        )
      }) %>%
        do.call(what = rbind)
    }
  }) %>%
  do.call(what = rbind) %>%
  mutate(late_growth = early_growth + late_growth)
## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

## Warning in min(.): no non-missing arguments to min; returning Inf

3 Comparing BNPR trajectory models

3.1 Plotting BNPR trajectories

Here, we plot all of the inferred BNPR trajectories for each clade.

all_fits %>%
  do.call(what = rbind) %>%
  mutate(fitness = str_match(obj_name,"[0-9.]+")) %>% 
  group_by(n) %>%
  mutate(x = x - min(x)) %>% 
  filter(min(y) > 1e-6 & max(y) < 1e8) %>%
  mutate(fitness = factor(
    fitness,
    levels = seq(0.005,0.03,by = 0.005),
    labels = sprintf("s=%s",seq(0.005,0.03,by = 0.005)))) %>% 
  ggplot(aes(x = x,y = y)) + 
  geom_line(aes(group = n),alpha = 0.25,size = 0.25) + 
  facet_wrap(~ fitness) + 
  scale_y_continuous(trans = 'log10',expand = c(0,0),breaks = c(1e-2,1,1e2,1e4,1e6)) +
  scale_x_continuous(expand = c(0,0)) +
  theme_gerstung(base_size = 6) + 
  theme(strip.text = element_text(margin = margin(b = 0.5))) +
  ylab("Effective population size (EPS)") + 
  xlab("Time since first coal. (generations)") + 
  coord_cartesian(ylim = c(1e-2,1e7)) +
  ggsave("figures/simulations/trajectories_BNPR.pdf",height = 1.5,width = 2.3)

3.2 Comparing BNPR and WF trajectories

Here we scale the BNPR trajectories so that they match as closely as possible those obtained from the WF trajectories. A fairly good fit between BNPR and WF trajectories is observable for a large fraction of trajectories.

simulated_trajectories <- list()
bnpr_estimates_ <- list()
for (x in names(all_driver_fits)) {
  if (x %in% names(all_estimates)) {
    for (y in names(all_driver_fits[[x]])) {
      clade_no <- 1
      bnpr_sums <- list()
      largest_clade <- which.max(unlist(lapply(all_estimates[[x]],function(x) length(x$clade))))[1]
      for (estimate in all_estimates[[x]]) {
        if (y == estimate$driver) {
          driver_fit <- all_driver_fits[[x]][[y]]$data
          bnpr_sum <- estimate$bnpr$summary
          W <- (log(bnpr_sum$quant0.975) - log(bnpr_sum$quant0.025))^2/16
          bnpr_sum$time <- (1 - bnpr_sum$time)*800
          if (clade_no == largest_clade) {
            opt_data <- data.frame(
              a = approx(bnpr_sum$time,y = bnpr_sum$mean,xout = driver_fit$Gen)$y,
              b = driver_fit$Count) %>%
              na.omit
            opt_fn <- function(pars) {
              l <- (opt_data$a * pars[1] - opt_data$b)^2
              l <- l
              return(sum(l))
            }
            opt_solution <- optim(c(m = 0.001,b = 1),
                                  opt_fn,
                                  control = list(maxit = 10000),
                                  method = "Nelder-Mead")
            P <- opt_solution$par
          }
          driver_fit$id <- x
          driver_fit$driver <- y
          bnpr_sum$id <- x
          bnpr_sum$driver <- y
          bnpr_sum$clade <- clade_no
          simulated_trajectories[[length(simulated_trajectories)+1]] <- driver_fit
          bnpr_sums[[length(bnpr_sums)+1]] <- bnpr_sum
          clade_no <- clade_no + 1
        }
      }
      for (bnpr_sum in bnpr_sums) {
        bnpr_sum$transformed_y <- bnpr_sum$mean * P[1]
        bnpr_sum$transformed_y_low <- bnpr_sum$quant0.025 * P[1]
        bnpr_sum$transformed_y_high <- bnpr_sum$quant0.975 * P[1]
        bnpr_estimates_[[length(bnpr_estimates_)+1]] <- bnpr_sum
      }
    }
  }
}

simulated_trajectories_df <- do.call(rbind,simulated_trajectories)
bnpr_estimates_df <- do.call(rbind,bnpr_estimates_)
ggplot(data = NULL) +
  geom_hline(yintercept = 2e5,size = 0.25,colour = "goldenrod",alpha = 0.7) +
  geom_line(data = simulated_trajectories_df,
            aes(x = Gen/100,y = Count,colour = "Simulated",
                group = paste(MutID_C,driver)),
            size = 0.25,
            alpha = 0.7) + 
  geom_line(data = bnpr_estimates_df,aes(x = time/100,y = transformed_y,
                                         group = paste(clade,driver),colour = "BNPR estimate"),
            size = 0.25,
            alpha = 0.5) + 
  facet_wrap(~ id,scales = "free") + 
  theme_gerstung(base_size = 6) + 
  theme(strip.text = element_blank(),legend.position = "bottom") + 
  scale_y_continuous(trans = 'log10') + 
  coord_cartesian(ylim = c(NA,2e8),c(0,8)) +
  scale_colour_manual(values = c(Simulated = "grey50",`BNPR estimate` = "orchid"),name = NULL) + 
  xlab("Generation (x100)") + 
  ggsave("figures/simulations/trajectories_wf_bnpr.pdf",height = 6,width = 7)

3.3 Comparing inference of early and late growth between BNPR fits and driver fits from WF simulations

We define here a measure of excessive observed variance in the BNPR trajectory - whenever the average log-EPS variance is larger than 10 we define these trajectories as "high variance" trajectories, noting that they are trajectories which were obtained from fairly small clades (fewer than 10 tips).

bnpr_wf_merge_df <- merge(all_bnpr_fits,all_driver_fits_df,by = c("name","driver")) %>%
  mutate(vaf_at_detection_low = pop_at_detection_low/2e5,
         vaf_at_detection_high = pop_at_detection_high/2e5) %>%
  mutate(vaf_at_detection_low = ifelse(vaf_at_detection_low > 1,1,vaf_at_detection_low),
         vaf_at_detection_high = ifelse(vaf_at_detection_high > 1,1,vaf_at_detection_high)) %>%
  mutate(vaf_contained = last_vaf > vaf_at_detection_low & last_vaf < vaf_at_detection_high) %>%
  mutate(vaf_contained_higher_lower = ifelse(
    vaf_contained == F,
    ifelse(last_vaf > vaf_at_detection_high,"higher","lower"),
    "contained")) %>%
  mutate(col = ifelse(w_sum < 10,"Low variance","High variance")) %>%
  mutate(wf_ratio = late_growth/early_growth,bnpr_ratio = cp_late/cp_1) %>% 
  mutate(bnpr_ratio = ifelse(is.infinite(bnpr_ratio),1,bnpr_ratio),
         wf_ratio = ifelse(is.infinite(wf_ratio),1,wf_ratio))


bnpr_wf_merge_df %>% 
  ggplot(aes(x = w_sum)) +
  geom_density() +
  scale_x_continuous(trans = 'log10') + 
  theme_gerstung(base_size = 6) + 
  xlab("Average variance")

ggplot(bnpr_wf_merge_df) +
  geom_boxplot(aes(x = col,y = clade_size),outlier.size = 0.5) + 
  scale_y_continuous(breaks = seq(0,100,by = 10)) + 
  theme_gerstung(base_size = 6) + 
  xlab("Trajectory type") + 
  ylab("Clade size")

early_v_early_plot <- bnpr_wf_merge_df %>% 
  ggplot(aes(x = early_growth,y = cp_1,colour = col)) + 
  geom_abline(slope = 1,linetype = 2, size = 0.25) +
  geom_point(size = 0.5) + 
  geom_smooth(method = "lm",alpha = 0.5,size = 0.25) +
  theme_gerstung(base_size = 6) + 
  coord_cartesian(xlim = c(0,0.07),ylim = c(0,0.07))  + 
  xlab("WF early growth") + 
  ylab("BNPR early growth") + 
  theme(legend.position = "bottom",
        legend.key.size = unit(0,"cm")) + 
  scale_colour_lancet() + 
  scale_colour_lancet(name = NULL)
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
late_v_late_plot <- bnpr_wf_merge_df %>% 
  ggplot(aes(x = late_growth,y = cp_late,colour = col)) + 
  geom_abline(slope = 1,linetype = 2, size = 0.25) +
  geom_point(size = 0.5) +
  geom_smooth(method = "lm",alpha = 0.5,size = 0.25) +
  theme_gerstung(base_size = 6) +
  coord_cartesian(xlim = c(-0.03,0.04),ylim = c(-0.03,0.04)) + 
  xlab("WF late growth") + 
  ylab("BNPR late growth") + 
  theme(legend.position = "bottom",
        legend.key.size = unit(0,"cm")) + 
  scale_colour_lancet() + 
  scale_colour_lancet(name = NULL)
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
cp_v_cp_plot <- bnpr_wf_merge_df %>% 
  ggplot(aes(x = changepoint_wf,y = changepoint_bnpr,colour = col)) + 
  geom_abline(slope = 1,linetype = 2, size = 0.25) +
  geom_point(size = 0.5) +
  geom_smooth(method = "lm",alpha = 0.5,size = 0.25) +
  theme_gerstung(base_size = 6) +
  coord_cartesian(xlim = c(200,800),ylim = c(200,800)) + 
  xlab("WF change point") + 
  ylab("BNPR change point") + 
  theme(legend.position = "bottom",
        legend.key.size = unit(0,"cm")) + 
  scale_colour_lancet(name = NULL)

plot_grid(early_v_early_plot + theme(legend.position = "none"),
          late_v_late_plot + theme(legend.position = "none"),
          cp_v_cp_plot + theme(legend.position = "none"),
          align = "hv",
          nrow = 1) %>%
  plot_grid(get_legend(early_v_early_plot),rel_heights = c(1,0.05),ncol = 1) + 
  ggsave("figures/simulations/compare_cp_wf_bnpr.pdf",height = 1.5,width = 4.5)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

3.4 Comparing the growth ratio between early and late phases of growth

We observe here that the ratio defined by \(\frac{LateGrowth}{EarlyGrowth}\) for both WF and BNPR independently fits quite closely to the identity, thus proving that we are capable of detecting this reasonably well using the BNPR trajectories alone.

bnpr_wf_merge_df %>% 
  subset(early_growth > 0 & cp_1 > 0) %>% 
  ggplot(aes(x = late_growth/early_growth,y = cp_late/cp_1)) + 
  geom_abline(slope = 1,linetype = 2,size = 0.25) +
  geom_point(size = 0.5,alpha = 0.4) + 
  geom_smooth(method = "lm",colour = "red4",alpha = 0.5,size = 0.25) +
  coord_cartesian(xlim = c(-1,4),ylim = c(-1,4)) + 
  theme_gerstung(base_size = 6) + 
  xlab("WF growth ratio") + 
  ylab("BNPR growth ratio")
## `geom_smooth()` using formula 'y ~ x'

bnpr_wf_merge_df %>%
  lm(formula = bnpr_ratio ~ wf_ratio) %>% 
  summary
## 
## Call:
## lm(formula = bnpr_ratio ~ wf_ratio, data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.8331 -0.4403 -0.2054  0.1414  5.5404 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.30111    0.07469   4.032 7.30e-05 ***
## wf_ratio     0.84646    0.14537   5.823 1.71e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8575 on 258 degrees of freedom
## Multiple R-squared:  0.1162, Adjusted R-squared:  0.1127 
## F-statistic:  33.9 on 1 and 258 DF,  p-value: 1.714e-08
table(all_driver_fits_df$last_vaf < all_driver_fits_df$expected_last_vaf) / nrow(all_driver_fits_df)
## 
##     FALSE      TRUE 
## 0.3155894 0.6844106

3.5 Comparing the expected and observed late growth

We also want to have a measure of growth deceleration that is more comparable with our inferences from the time series data - based on a sigmoidal fit with a fixed carrying capacity (\(VAF = 0.5\)) that is then extrapolated to its onset of origin. As such, we fit a sigmoidal to the BNPR that assumes a total carrying capacity of \(1\) and that the final EPS corresponds to a proxy for the VAF that is parametrised as \(\hat{VAF} = \frac{Number\ Of\ Tips\ In\ The\ Clade}{Total\ Tips\ In\ The\ Tree}\).

expected_late_growth <- function(vaf,id,clade) {
  clade <- as.numeric(clade)
  if (clade <= length(all_estimates[[id]])) {
    
    tmp_df <- all_estimates[[id]][[clade]]$data %>%
      arrange(X)
    last_eps <- tmp_df$Y_exp[nrow(tmp_df)]
    last_eps <- max(tmp_df$Y_exp)
    tmp_df$normalized_Y <- tmp_df$Y_exp / last_eps * vaf

    nl_fit <- nls(formula = normalized_Y ~ SSlogis(X,1,Xmid,b),
                  start = list(Xmid = mean(tmp_df$X),b = 10),
                  control = nls.control(maxiter = 1000,minFactor = 1/(1024^32),
                                        warnOnly = T),
                  data = tmp_df)
    return(nl_fit$m$getAllPars()[2])
  } else {
    return(NA)
  }
}

bnpr_wf_merge_df$expected_nl <- rep(NA,nrow(bnpr_wf_merge_df))

for (i in 1:nrow(bnpr_wf_merge_df)) {
  sub_bnpr_wf_merge_df <- bnpr_wf_merge_df[i,]
  last_vaf <- sub_bnpr_wf_merge_df$last_vaf
  name <- as.character(sub_bnpr_wf_merge_df$name)
  clade <- sub_bnpr_wf_merge_df$clade
  bnpr_wf_merge_df$expected_nl[i] <- 1/expected_late_growth(last_vaf,name,clade)
}
## Warning in nls(formula = normalized_Y ~ SSlogis(X, 1, Xmid, b), start =
## list(Xmid = mean(tmp_df$X), : singular gradient
bnpr_wf_merge_df %>% 
  subset(col == "Low variance") %>% 
  mutate(expected_late_ratio_wf = late_growth/growth_rate,
         expected_late_ratio = cp_late / expected_nl) %>%
  mutate(expected_late_ratio = ifelse(expected_late_ratio < -1,-1,expected_late_ratio)) %>% 
  ggplot(aes(x = expected_late_ratio_wf < 0.8,y = expected_late_ratio)) +
  geom_jitter(width = 0.3,size = 0.5,alpha = 0.7) +
  geom_boxplot(outlier.alpha = 0) + 
  theme_gerstung(base_size = 6) +
  scale_y_continuous(breaks = c(-1,0,1,2),
                     labels = c("<-1","0","1","2")) + 
  ylab("BNPR late/expected growth ratio") +
  xlab("WF late/expected growth ratio < 0.8")

bnpr_wf_merge_df %>% 
  subset(col == "Low variance") %>% 
  mutate(expected_late_ratio = cp_late / expected_nl) %>% 
  mutate(expected_late_ratio = ifelse(expected_late_ratio < -1,-1,expected_late_ratio)) %>% 
  ggplot(aes(y = expected_late_ratio,x=0)) + 
  geom_jitter(size = 0.5,alpha = 0.5,width = 0.05) + 
  geom_boxplot(size = 0.25,width = 0.1,outlier.size = 0,alpha = 0.8) + 
  theme_gerstung(base_size = 6) + 
  geom_hline(yintercept = 1,linetype = 2) +
  #coord_cartesian(ylim = c(0,5)) + 
  scale_y_continuous(breaks = c(-1,0,1,2),
                     labels = c("<-1","0","1","2")) + 
  ylab("Observed/expected late growth ratio") +
  xlab("") +
  theme(axis.text.x = element_blank(),
        axis.ticks.x = element_blank()) +
  ggsave("figures/simulations/expected_late_growth_ratio.pdf",height = 1.5,width = 0.7)

3.6 Comparing inference of sigmoidal fit between BNPR fits and driver fits from WF simulations

As mentioned, we also fit sigmoidal curves to each trajectory and here we assess whether they can be used for inference.

bnpr_wf_merge_sigmoid_df <- bnpr_wf_merge_df
asymptote_graph <- bnpr_wf_merge_sigmoid_df %>%
  ggplot(aes(x = Asym,y = nl_asym,colour = col)) + 
  geom_point(size = 0.5) + 
  scale_y_continuous(trans = 'log10') +
  scale_x_continuous(trans = 'log10') +
  theme_gerstung(base_size = 6) +
  xlab("Simulated asymptote") +
  ylab("BNPR asymptote") + 
  theme(legend.position = "none") + 
  coord_cartesian(ylim = c(NA,1e6)) + 
  scale_color_lancet(name = NULL)

midpoint_graph <- bnpr_wf_merge_sigmoid_df %>%
  ggplot(aes(x = xmid,y = nl_xmid,colour = col)) + 
  geom_abline(slope = 1,size = 0.25,linetype = 2) +
  geom_smooth(method = "lm",size = 0.25,alpha = 0.25) +
  geom_point(size = 0.5) + 
  coord_cartesian(xlim = c(0,2000),ylim = c(0,2000)) + 
  theme_gerstung(base_size = 6) +
  xlab("Simulated midpoint") +
  ylab("BNPR midpoint") + 
  theme(legend.position = "none") + 
  scale_color_lancet(name = NULL)

point_of_saturation_plot <- bnpr_wf_merge_sigmoid_df %>%
  subset(!is.infinite(point_of_saturation) & !is.infinite(point_of_saturation_wf)) %>%
  ggplot(aes(x = point_of_saturation,y = point_of_saturation_wf,colour = col)) + 
  geom_abline(slope = 1,size = 0.25,linetype = 2) +
  geom_smooth(method = "lm",size = 0.25,alpha = 0.25) +
  geom_point(size = 0.5) +
  coord_cartesian(xlim = c(200,800),ylim = c(200,800)) + 
  theme_gerstung(base_size = 6) + 
  xlab("Simulated generation at saturation") +
  ylab("BNPR generation at saturation") + 
  theme(legend.position = "none") + 
  scale_color_lancet(name = NULL)

fitness_plot <- bnpr_wf_merge_sigmoid_df %>%
  ggplot(aes(x = growth_rate,y = nl,colour = col)) +
  geom_abline(slope = 1,size = 0.25,linetype = 2) +
  geom_smooth(method = "lm",size = 0.25,alpha = 0.25) +
  geom_point(size = 0.5) +
  coord_cartesian() + 
  theme_gerstung(base_size = 6) + 
  xlab("Fitness inferred from simulation") +
  ylab("Fitness inferred from BNPR") +
  theme(legend.key.width = unit(0.1,"cm"),
        legend.key.height = unit(0.3,"cm")) + 
  scale_color_lancet(name = NULL) + 
  coord_cartesian(ylim = c(NA,0.05),xlim = c(NA,NA))
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
plot_grid(
  asymptote_graph,midpoint_graph,point_of_saturation_plot,
  fitness_plot + theme(legend.position = "none"),
  nrow = 2,align = "hv",
  rel_heights = c(1,1)
) %>% 
  plot_grid(plot_grid(ggplot() + theme_nothing(),
                      get_legend(fitness_plot + theme(legend.position = "bottom")),
                      ggplot() + theme_nothing(),nrow = 1),
            rel_heights = c(1,0.1),ncol = 1) + 
  ggsave("figures/simulations/compare_wf_bnpr.pdf",height = 3.6,width = 4.8)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

truth <- is.finite(bnpr_wf_merge_df$point_of_saturation_wf)
pred <- is.finite(bnpr_wf_merge_df$point_of_saturation)
acc <- sum(truth == pred) / length(truth)
prec <- sum(truth == pred & truth == 1) / (sum(pred == 1))
recall <- sum(truth == pred & truth == 1) / (sum(truth == 1))

truth_low_var <- is.finite(
  subset(bnpr_wf_merge_df,col == "Low variance")$point_of_saturation_wf)
pred_low_var <- is.finite(
  subset(bnpr_wf_merge_df,col == "Low variance")$point_of_saturation)
acc_low_var <- sum(truth_low_var == pred_low_var) / length(truth_low_var)
prec_low_var <- sum(truth_low_var == pred_low_var & truth_low_var == 1)/(sum(pred_low_var == 1))
recall_low_var <- sum(truth_low_var == pred_low_var & truth_low_var == 1)/(sum(truth_low_var == 1))

data.frame(
  Accuracy = c(acc,acc_low_var),
  Precision = c(prec,prec_low_var),
  Recall = c(recall,recall_low_var),
  Data = c("All trajectories","Low variance trajectories")
)

3.7 Benchmarking different models for BNPR fits

Finally we compare all of the methods specified above and make statements regarding their usability in predicting the true fitness of each clade. We note that log-linear fits have a tendency to underestimate the growth rate of clones with large fitness values, whereas sigmoidal curves have a tendency to do the opposite - overestimate the growth rate of clones with small fitness values. The early growth rate stands as the most adequate solution for our problem, with \(R^2 > 0.3\) for fits with low variance. We must note that we still observe an \(RMSE = 0.01\) which would, assuming 10 generations per year, correspond to a variation of \(\pm 10%\). As such, these inferences should account for this considerable level of uncertainty that is associated with each estimate.

all_bnpr_fits <- all_bnpr_fits %>% 
  mutate(col = ifelse(w_sum < 10,"Low variance","High variance")) 

limits <- c(0,0.05)
plot_list <- list()
metric_list <- list()

for (col_name in c("l","cp_1","cp_late","nl")) {
  sub_data <- all_bnpr_fits %>%
    ungroup() %>%
    select(fitness,R = col_name) %>%
    na.omit()
  sub_data_low <- all_bnpr_fits %>%
    subset(col == "Low variance") %>%
    ungroup() %>%
    select(fitness,R = col_name) %>%
    na.omit() 
  
  plot_list[[col_name]] <- all_bnpr_fits %>%
    select(fitness,R = col_name,col = col) %>% 
    ggplot(aes(x = fitness,y = R,group = paste(fitness),colour = col)) +
    geom_jitter(width = 0.001,size = 0.5) +
    geom_boxplot(outlier.alpha = 0,size = 0.25,alpha = 0.7,colour = "black") +
    theme_gerstung(base_size = 6) +
    coord_cartesian(xlim = limits,
                    ylim = limits) +
    geom_abline(slope = 1) +
    ylab("Inferred growth") +
    xlab("Simulated growth") + 
    scale_colour_lancet(guide = F)
  
  metric_list[[col_name]] <- data.frame(
    method = col_name,
    s = c("All trajectories","Low variance"),
    R = c(cor(sub_data$fitness,sub_data$R),
          cor(sub_data_low$fitness,sub_data_low$R)),
    mse = c(mean((sub_data$fitness - sub_data$R)^2),
            mean((sub_data_low$fitness - sub_data_low$R)^2))
  )
}
## Note: Using an external vector in selections is ambiguous.
## ℹ Use `all_of(col_name)` instead of `col_name` to silence this message.
## ℹ See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
## Adding missing grouping variables: `name`
## Adding missing grouping variables: `name`
## Adding missing grouping variables: `name`
## Adding missing grouping variables: `name`
metric_df <- do.call(rbind,metric_list) %>%
  mutate(
    
    met = list(l = "Linear\ngrowth",
               cp_1 = "Early linear\ngrowth",
               cp_late = "Late linear\ngrowth",
               nl = "Sigmoidal\ngrowth")[method] %>%
      unlist
  )

r2_plot <- ggplot(data = metric_df,aes(x = reorder(met,-R^2),y = R^2,fill = s)) + 
  geom_bar(stat = "identity",position = "dodge") + 
  xlab("") + 
  ylab("R2") + 
  theme_gerstung(base_size = 6) +
  coord_flip() + 
  scale_y_continuous(expand = c(0,0)) + 
  scale_fill_lancet(guide = F)

mse_plot <- ggplot(data = metric_df,aes(x = reorder(met,-mse),y = sqrt(mse),fill = s)) + 
  geom_bar(stat = "identity",position = "dodge") + 
  xlab("") + 
  ylab("RMSE") + 
  theme_gerstung(base_size = 6) +
  coord_flip() + 
  scale_y_continuous(expand = c(0,0)) + 
  scale_fill_lancet(guide = F)

plot_grid(
  plot_list$l + ggtitle("Linear growth"),
  plot_list$cp_1 + ggtitle("Early linear growth"),
  plot_list$cp_late + ggtitle("Late linear growth"),
  plot_list$nl + ggtitle("Sigmoidal growth"),
  r2_plot,
  mse_plot,
  rel_heights = c(1,1,0.6),
  ncol = 2) + 
  ggsave("figures/simulations/validation_BNPR.pdf",height = 5,width = 4.7)

write.csv(bnpr_wf_merge_df,"data_output/simulated_bnpr_coefficients.csv",row.names = F)